* This script contains the code to replicate Table 5, Table 6 and Table 7 in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.

log using Tables5-7.log, replace
use prodndnp_fullsample.dta
set matsize 6000
set more off

/*******OLS REGRESIONS**************/

/*OLS out-of-sample program*/
cap pro drop ols_os
program define ols_os
syntax varlist(numeric min=1) [if]
marksample touse
reg `varlist' if `touse', vce(cluster auth) /* Estimating the model*/
qui scalar n1=e(N)  /*saving the number of observations*/
scalar rmseols=e(rmse)  /*saving the RMSE in-sample*/
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
predict ybols if group==2 & lprodf3<., xb  /*predicting future output using the out-of-sample*/
qui gen sqerrols= (lprodf3-ybols)^2 if ybols<. /* obtaining the residual squares*/
quietly egen peols=sum(sqerrols)if ybols<.  /* Residual Sum of Squares (RSS) of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/nols)^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols
end


/*MODEL 0*/

reg lprodf3 y2-y27 nopapers t2-t27 if group==1
gen e=1 if e(sample) 
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/(nols))^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols


/*MODEL 1*/

reg lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 if group==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM1= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/(nols))^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols


/* TABLE 5. Prediction accuracy of the unrestricted Models 1' and 2'*/
/**********MODEL 2. Table 5, row coauthors' productivity**********/

ols_os lprodf3 y2-y27 nopapers t2-t27  lnetprod1y lnetprod2y lnetprod3y lnetprod4y lnetprod5y lnetprod6y lnetprod7y lnetprod8y lnetprod9y lnetprod10y lnetprod11y lnetprod12y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrolsM1-sqerrols
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(12)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row coauthors' coauthors productivity**********/

ols_os lprodf3 y2-y27 nopapers t2-t27  lnetprod21y lnetprod22y lnetprod23y lnetprod24y lnetprod25y lnetprod26y lnetprod27y lnetprod28y lnetprod29y lnetprod210y lnetprod211y lnetprod212y-lnetprod215y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Degree**********/

ols_os lprodf3 y2-y27 nopapers t2-t27  degree1y degree2y degree3y degree4y degree5y degree6y degree7y degree8y degree9y degree10y degree11y degree12y-degree15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Degree 2 **********/

ols_os lprodf3 y2-y27 nopapers t2-t27  degree21y degree22y degree23y degree24y degree25y degree26y degree27y degree28y degree29y degree210y degree211y degree212y degree213y degree214y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(14)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Giant component **********/

ols_os lprodf3 y2-y27 nopapers t2-t27  gc1y gc2y-gc15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Betweenness **********/

ols_os lprodf3 y2-y27 nopapers t2-t27  gc1y gc2y-gc15y lbet1y-lbet15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Closeness**********/

ols_os lprodf3 y2-y27 nopapers t2-t27  gc1y gc2y-gc15y lclos1y-lclos15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/**********MODEL 2. Table 5, row Working with a top 1% **********/

ols_os lprodf3 y2-y27 nopapers t2-t27  neiq1fs1y neiq1fs2y neiq1fs3y neiq1fs4y neiq1fs5y neiq1fs6y neiq1fs7y neiq1fs8y neiq1fs9y neiq1fs10y neiq1fs11y neiq1fs12y neiq1fs13y neiq1fs14y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(14)
drop sqerrols diffsqerrlM2

/***************** TABLE 6. Prediction accuracy of the unrestricted Models 1' and 3'***********/
/*******MODEL 3. Table 6, row coauthors' productivity**********/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 lnetprod1y-lnetprod12y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(12)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row coauthors' coauthors productivity*******/


/* Comparing the performance with simple OLS*/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 lnetprod21y-lnetprod211y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(11)
drop sqerrols diffsqerrlM3


/*******MODEL 3. Table 6, row Degree *******/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 degree1y-degree6y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(6)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row Degree 2 *******/


ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 degree21y-degree25y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row Giant component *******/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 gc1y-gc8y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(8)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row Betweenness *******/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 gc1y-gc9y lbet1y-lbet9y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(9)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row Closeness *******/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 gc1y-gc7y lclos1y-lclos10y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(10)
drop sqerrols diffsqerrlM3

/*******MODEL 3. Table 6, row Working with a top 1% *******/

ols_os lprodf3 y2-y27 nopapers t2-t27 lprodf3l-lprodf3l13 neiq1fs1y-neiq1fs13y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(13)
drop sqerrols diffsqerrlM3



/************TABLE 7. Prediction accuracy of the unrestricted multivariate models*********/
/*****Row Multivariate Model 2'*****/

ols_os lprodf3 y2-y27 nopapers t2-t27 lnetprod1y-lnetprod15y lnetprod21y-lnetprod215y degree1y-degree15y degree21y-degree215y gc1y-gc15y lbet1y-lbet15y lclos1y-lclos15y neiq1fs1y-neiq1fs15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(15)
drop sqerrols diffsqerrlM2

/*****Row Multivariate Model 3'*****/

ols_os lprodf3 y2-y27 nopapers t2-t27  lprodf3l-lprodf3l13 lnetprod1y-lnetprod8y lnetprod21y-lnetprod28y degree1y-degree8y degree21y-degree28y gc1y-gc8y lbet1y-lbet8y lclos1y-lclos8y neiq1fs1y-neiq1fs8y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(8)
drop sqerrols diffsqerrlM2

log close

